/*
 * stdaln.c -- standard alignment (local and banded global alignment)
 *
 * Copyright (c) 2003-2006, Li Heng <liheng@genomics.org.cn>
 *                                  <lh3lh3@gmail.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "stdaln.h"

/* char -> 17 (=16+1) nucleotides */
unsigned char aln_nt16_table[256] = {
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,16 /*'-'*/,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
                                        15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
                                        15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
                                        15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
                                        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
                                    };
char *aln_nt16_rev_table = "XAGRCMSVTWKDYHBN-";

/* char -> 5 (=4+1) nucleotides */
unsigned char aln_nt4_table[256] = {
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 5 /*'-'*/, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
                                       4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
                                   };
char *aln_nt4_rev_table = "AGCTN-";

/* char -> 22 (=20+1+1) amino acids */
unsigned char aln_aa_table[256] = {
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,20,21, 21,22 /*'-'*/,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
                                      14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
                                      21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
                                      14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
                                      21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21
                                  };
char *aln_aa_rev_table = "ARNDCQEGHILKMFPSTWYV*X-";
/* 01234567890123456789012 */

/* translation table. They are useless in stdaln.c, but when you realize you need it, you need not write the table again. */
unsigned char aln_trans_table_eu[66] = {
                                           11,11, 2, 2,  1, 1,15,15, 16,16,16,16,  9,12, 9, 9,
                                           6, 6, 3, 3,  7, 7, 7, 7,  0, 0, 0, 0, 19,19,19,19,
                                           5, 5, 8, 8,  1, 1, 1, 1, 14,14,14,14, 10,10,10,10,
                                           20,20,18,18, 20,17, 4, 4, 15,15,15,15, 10,10,13,13, 21, 22
                                       };
char *aln_trans_table_eu_char = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFFX";
/* 01234567890123456789012345678901234567890123456789012345678901234 */
int aln_sm_blosum62[] = {
                            /*  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
                            4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,
                            -1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,
                            -2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,
                            -2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,
                            0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,
                            -1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,
                            -1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,
                            0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,
                            -2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,
                            -1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,
                            -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,
                            -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,
                            -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,
                            -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,
                            -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,
                            1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,
                            0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,
                            -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,
                            -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,
                            0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,
                            -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,
                            0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1
                        };

int aln_sm_blosum45[] = {
                            /*  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
                            5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-2,-2, 0,-5, 0,
                            -2, 7, 0,-1,-3, 1, 0,-2, 0,-3,-2, 3,-1,-2,-2,-1,-1,-2,-1,-2,-5,-1,
                            -1, 0, 6, 2,-2, 0, 0, 0, 1,-2,-3, 0,-2,-2,-2, 1, 0,-4,-2,-3,-5,-1,
                            -2,-1, 2, 7,-3, 0, 2,-1, 0,-4,-3, 0,-3,-4,-1, 0,-1,-4,-2,-3,-5,-1,
                            -1,-3,-2,-3,12,-3,-3,-3,-3,-3,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1,-5,-2,
                            -1, 1, 0, 0,-3, 6, 2,-2, 1,-2,-2, 1, 0,-4,-1, 0,-1,-2,-1,-3,-5,-1,
                            -1, 0, 0, 2,-3, 2, 6,-2, 0,-3,-2, 1,-2,-3, 0, 0,-1,-3,-2,-3,-5,-1,
                            0,-2, 0,-1,-3,-2,-2, 7,-2,-4,-3,-2,-2,-3,-2, 0,-2,-2,-3,-3,-5,-1,
                            -2, 0, 1, 0,-3, 1, 0,-2,10,-3,-2,-1, 0,-2,-2,-1,-2,-3, 2,-3,-5,-1,
                            -1,-3,-2,-4,-3,-2,-3,-4,-3, 5, 2,-3, 2, 0,-2,-2,-1,-2, 0, 3,-5,-1,
                            -1,-2,-3,-3,-2,-2,-2,-3,-2, 2, 5,-3, 2, 1,-3,-3,-1,-2, 0, 1,-5,-1,
                            -1, 3, 0, 0,-3, 1, 1,-2,-1,-3,-3, 5,-1,-3,-1,-1,-1,-2,-1,-2,-5,-1,
                            -1,-1,-2,-3,-2, 0,-2,-2, 0, 2, 2,-1, 6, 0,-2,-2,-1,-2, 0, 1,-5,-1,
                            -2,-2,-2,-4,-2,-4,-3,-3,-2, 0, 1,-3, 0, 8,-3,-2,-1, 1, 3, 0,-5,-1,
                            -1,-2,-2,-1,-4,-1, 0,-2,-2,-2,-3,-1,-2,-3, 9,-1,-1,-3,-3,-3,-5,-1,
                            1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-3,-1,-2,-2,-1, 4, 2,-4,-2,-1,-5, 0,
                            0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-1,-1, 2, 5,-3,-1, 0,-5, 0,
                            -2,-2,-4,-4,-5,-2,-3,-2,-3,-2,-2,-2,-2, 1,-3,-4,-3,15, 3,-3,-5,-2,
                            -2,-1,-2,-2,-3,-1,-2,-3, 2, 0, 0,-1, 0, 3,-3,-2,-1, 3, 8,-1,-5,-1,
                            0,-2,-3,-3,-1,-3,-3,-3,-3, 3, 1,-2, 1, 0,-3,-1, 0,-3,-1, 5,-5,-1,
                            -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 1,-5,
                            0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0,-2,-1,-1,-5,-1
                        };

int aln_sm_nt[] = {
                      /*  X  A  G  R  C  M  S  V  T  W  K  D  Y  H  B  N */
                      -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
                      -2, 2,-1, 1,-2, 1,-2, 0,-2, 1,-2, 0,-2, 0,-2, 0,
                      -2,-1, 2, 1,-2,-2, 1, 0,-2,-2, 1, 0,-2,-2, 0, 0,
                      -2, 1, 1, 1,-2,-1,-1, 0,-2,-1,-1, 0,-2, 0, 0, 0,
                      -2,-2,-2,-2, 2, 1, 1, 0,-1,-2,-2,-2, 1, 0, 0, 0,
                      -2, 1,-2,-1, 1, 1,-1, 0,-2,-1,-2, 0,-1, 0, 0, 0,
                      -2,-2, 1,-1, 1,-1, 1, 0,-2,-2,-1, 0,-1, 0, 0, 0,
                      -2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,
                      -2,-2,-2,-2,-1,-2,-2,-2, 2, 1, 1, 0, 1, 0, 0, 0,
                      -2, 1,-2,-1,-2,-1,-2, 0, 1, 1,-1, 0,-1, 0, 0, 0,
                      -2,-2, 1,-1,-2,-2,-1, 0, 1,-1, 1, 0,-1, 0, 0, 0,
                      -2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      -2,-2,-2,-2, 1,-1,-1, 0, 1,-1,-1, 0, 1, 0, 0, 0,
                      -2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      -2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                  };

int aln_sm_read[] = {
                        /*   X   A   G   R   C   M   S   V   T   W   K   D   Y   H   B   N  */
                        -17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,
                        -17,  2,-17,  1,-17,  1,-17,  0,-17,  1,-17,  0,-17,  0,-17,  0,
                        -17,-17,  2,  1,-17,-17,  1,  0,-17,-17,  1,  0,-17,-17,  0,  0,
                        -17,  1,  1,  1,-17,-17,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
                        -17,-17,-17,-17,  2,  1,  1,  0,-17,-17,-17,-17,  1,  0,  0,  0,
                        -17,  1,-17,-17,  1,  1,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
                        -17,-17,  1,-17,  1,-17,  1,  0,-17,-17,-17,  0,-17,  0,  0,  0,
                        -17,  0,  0,  0,  0,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,
                        -17,-17,-17,-17,-17,-17,-17,-17,  2,  1,  1,  0,  1,  0,  0,  0,
                        -17,  1,-17,-17,-17,-17,-17,  0,  1,  1,-17,  0,-17,  0,  0,  0,
                        -17,-17,  1,-17,-17,-17,-17,  0,  1,-17,  1,  0,-17,  0,  0,  0,
                        -17,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                        -17,-17,-17,-17,  1,-17,-17,  0,  1,-17,-17,  0,  1,  0,  0,  0,
                        -17,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                        -17,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                        -17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
                    };

int aln_sm_hs[] = {
                      /*     A    G    C    T    N */
                      91, -31,-114,-123, -44,
                      -31, 100,-125,-114, -42,
                      -123,-125, 100, -31, -42,
                      -114,-114, -31,  91, -42,
                      -44, -42, -42, -42, -43
                  };

/********************/
/* START OF align.c */
/********************/

AlnParam aln_param_nt2nt   = { 10,  2,  2, aln_sm_nt, 16, 75 };
AlnParam aln_param_rd2rd   = { 20, 19, 19, aln_sm_read, 16, 75 };
AlnParam aln_param_aa2aa   = { 12,  2,  2, aln_sm_blosum62, 22, 50 };

AlnAln *aln_init_AlnAln()
{
    AlnAln *aa;
    aa = (AlnAln*)MYALLOC(sizeof(AlnAln));
    aa->path = 0;
    aa->out1 = aa->out2 = aa->outm = 0;
    aa->path_len = 0;
    return aa;
}
void aln_free_AlnAln(AlnAln *aa)
{
    MYFREE(aa->path);
    MYFREE(aa->out1);
    MYFREE(aa->out2);
    MYFREE(aa->outm);
    MYFREE(aa);
}

/***************************/
/* START OF common_align.c */
/***************************/

#define LOCAL_OVERFLOW_THRESHOLD 32000
#define LOCAL_OVERFLOW_REDUCE 16000
#define NT_LOCAL_SCORE int
#define NT_LOCAL_SHIFT 16
#define NT_LOCAL_MASK 0xffff

#define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF;

#define set_M(MM, cur, p, sc)       \
{              \
 if ((p)->M >= (p)->I) {        \
  if ((p)->M >= (p)->D) {       \
   (MM) = (p)->M + (sc); (cur)->Mt = FROM_M; \
  } else {          \
   (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \
  }            \
 } else {           \
  if ((p)->I > (p)->D) {       \
   (MM) = (p)->I + (sc); (cur)->Mt = FROM_I; \
  } else {          \
   (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \
  }            \
 }             \
}
#define set_I(II, cur, p)        \
{              \
 if ((p)->M - gap_open > (p)->I - gap_ext) {   \
  (cur)->It = FROM_M;        \
  (II) = (p)->M - gap_open;      \
 } else {           \
  (cur)->It = FROM_I;        \
  (II) = (p)->I - gap_ext;      \
 }             \
}
#define set_end_I(II, cur, p)       \
{              \
 if (gap_end >= 0) {         \
  if ((p)->M > (p)->I) {       \
   (cur)->It = FROM_M;       \
   (II) = (p)->M - gap_end;     \
  } else {          \
   (cur)->It = FROM_I;       \
   (II) = (p)->I - gap_end;     \
  }            \
 } else set_I(II, cur, p);       \
}
#define set_D(DD, cur, p)        \
{              \
 if ((p)->M - gap_open > (p)->D - gap_ext) {   \
  (cur)->Dt = FROM_M;        \
  (DD) = (p)->M - gap_open;      \
 } else {           \
  (cur)->Dt = FROM_D;        \
  (DD) = (p)->D - gap_ext;      \
 }             \
}
#define set_end_D(DD, cur, p)       \
{              \
 if (gap_end >= 0) {         \
  if ((p)->M > (p)->D) {       \
   (cur)->Dt = FROM_M;       \
   (DD) = (p)->M - gap_end;     \
  } else {          \
   (cur)->Dt = FROM_D;       \
   (DD) = (p)->D - gap_end;     \
  }            \
 } else set_D(DD, cur, p);       \
}

typedef struct
{

unsigned char Mt:

3, It:

2, Dt:
    2;
}
dpcell_t;

typedef struct
{
    int M, I, D;
}
dpscore_t;

/* build score profile for accelerating alignment, in theory */
void aln_init_score_array(unsigned char *seq, int len, int row, int *score_matrix, int **s_array)
{
    int *tmp, *tmp2, i, k;

    for (i = 0; i != row; ++i)
    {
        tmp = score_matrix + i * row;
        tmp2 = s_array[i];

        for (k = 0; k != len; ++k)
            tmp2[k] = tmp[seq[k]];
    }
}
/***************************
 * banded global alignment *
 ***************************/
int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
                    path_t *path, int *path_len)
{
    register int i, j;
    dpcell_t **dpcell, *q;
    dpscore_t *curr, *last, *s;
    path_t *p;
    int b1, b2, tmp_end;
    int *mat, end, max;
    unsigned char type, ctype;

    int gap_open, gap_ext, gap_end, b;
    int *score_matrix, N_MATRIX_ROW;

    /* initialize some align-related parameters. just for compatibility */
    gap_open = ap->gap_open;
    gap_ext = ap->gap_ext;
    gap_end = ap->gap_end;
    b = ap->band_width;
    score_matrix = ap->matrix;
    N_MATRIX_ROW = ap->row;

    if (len1 == 0 || len2 == 0)
    {
        *path_len = 0;
        return 0;
    }
    /* calculate b1 and b2 */
    if (len1 > len2)
    {
        b1 = len1 - len2 + b;
        b2 = b;
    }
    else
    {
        b1 = b;
        b2 = len2 - len1 + b;
    }
    if (b1 > len1)
        b1 = len1;

    if (b2 > len2)
        b2 = len2;

    --seq1;

    --seq2;

    /* allocate memory */
    end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1);

    dpcell = (dpcell_t**)MYALLOC(sizeof(dpcell_t*) * (len2 + 1));

    for (j = 0; j <= len2; ++j)
        dpcell[j] = (dpcell_t*)MYALLOC(sizeof(dpcell_t) * end);

    for (j = b2 + 1; j <= len2; ++j)
        dpcell[j] -= j - b2;

    curr = (dpscore_t*)MYALLOC(sizeof(dpscore_t) * (len1 + 1));

    last = (dpscore_t*)MYALLOC(sizeof(dpscore_t) * (len1 + 1));

    /* set first row */
    SET_INF(*curr);

    curr->M = 0;

    for (i = 1, s = curr + 1; i < b1; ++i, ++s)
    {
        SET_INF(*s);
        set_end_D(s->D, dpcell[0] + i, s - 1);
    }

    s = curr;
    curr = last;
    last = s;

    /* core dynamic programming, part 1 */
    tmp_end = (b2 < len2)? b2 : len2 - 1;

    for (j = 1; j <= tmp_end; ++j)
    {
        q = dpcell[j];
        s = curr;
        SET_INF(*s);
        set_end_I(s->I, q, last);
        end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
        mat = score_matrix + seq2[j] * N_MATRIX_ROW;
        ++s;
        ++q;

        for (i = 1; i != end; ++i, ++s, ++q)
        {
            set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
            set_I(s->I, q, last + i);
            set_D(s->D, q, s - 1);
        }

        set_M(s->M, q, last + i - 1, mat[seq1[i]]);
        set_D(s->D, q, s - 1);

        if (j + b1 - 1 > len1)
        { /* bug fixed, 040227 */
            set_end_I(s->I, q, last + i);
        }
        else
            s->I = MINOR_INF;

        s = curr;

        curr = last;

        last = s;
    }
    /* last row for part 1, use set_end_D() instead of set_D() */
    if (j == len2 && b2 != len2 - 1)
    {
        q = dpcell[j];
        s = curr;
        SET_INF(*s);
        set_end_I(s->I, q, last);
        end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
        mat = score_matrix + seq2[j] * N_MATRIX_ROW;
        ++s;
        ++q;

        for (i = 1; i != end; ++i, ++s, ++q)
        {
            set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
            set_I(s->I, q, last + i);
            set_end_D(s->D, q, s - 1);
        }

        set_M(s->M, q, last + i - 1, mat[seq1[i]]);
        set_end_D(s->D, q, s - 1);

        if (j + b1 - 1 > len1)
        { /* bug fixed, 040227 */
            set_end_I(s->I, q, last + i);
        }
        else
            s->I = MINOR_INF;

        s = curr;

        curr = last;

        last = s;

        ++j;
    }

    /* core dynamic programming, part 2 */
    for (; j <= len2 - b2 + 1; ++j)
    {
        SET_INF(curr[j - b2]);
        mat = score_matrix + seq2[j] * N_MATRIX_ROW;
        end = j + b1 - 1;

        for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q)
        {
            set_M(s->M, q, last + i - 1, mat[seq1[i]]);
            set_I(s->I, q, last + i);
            set_D(s->D, q, s - 1);
        }

        set_M(s->M, q, last + i - 1, mat[seq1[i]]);
        set_D(s->D, q, s - 1);
        s->I = MINOR_INF;
        s = curr;
        curr = last;
        last = s;
    }

    /* core dynamic programming, part 3 */
    for (; j < len2; ++j)
    {
        SET_INF(curr[j - b2]);
        mat = score_matrix + seq2[j] * N_MATRIX_ROW;

        for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q)
        {
            set_M(s->M, q, last + i - 1, mat[seq1[i]]);
            set_I(s->I, q, last + i);
            set_D(s->D, q, s - 1);
        }

        set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
        set_end_I(s->I, q, last + i);
        set_D(s->D, q, s - 1);
        s = curr;
        curr = last;
        last = s;
    }
    /* last row */
    if (j == len2)
    {
        SET_INF(curr[j - b2]);
        mat = score_matrix + seq2[j] * N_MATRIX_ROW;

        for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q)
        {
            set_M(s->M, q, last + i - 1, mat[seq1[i]]);
            set_I(s->I, q, last + i);
            set_end_D(s->D, q, s - 1);
        }

        set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
        set_end_I(s->I, q, last + i);
        set_end_D(s->D, q, s - 1);
        s = curr;
        curr = last;
        last = s;
    }

    /* backtrace */
    i = len1;

    j = len2;

    q = dpcell[j] + i;

    s = last + len1;

    max = s->M;

    type = q->Mt;

    ctype = FROM_M;

    if (s->I > max)
    {
        max = s->I;
        type = q->It;
        ctype = FROM_I;
    }
    if (s->D > max)
    {
        max = s->D;
        type = q->Dt;
        ctype = FROM_D;
    }

    p = path;
    p->ctype = ctype;
    p->i = i;
    p->j = j; /* bug fixed 040408 */
    ++p;

    do
    {
        switch (ctype)
        {

        case FROM_M:
            --i;
            --j;
            break;

        case FROM_I:
            --j;
            break;

        case FROM_D:
            --i;
            break;
        }

        q = dpcell[j] + i;
        ctype = type;

        switch (type)
        {

        case FROM_M:
            type = q->Mt;
            break;

        case FROM_I:
            type = q->It;
            break;

        case FROM_D:
            type = q->Dt;
            break;
        }

        p->ctype = ctype;
        p->i = i;
        p->j = j;
        ++p;
    }
    while (i || j);

    *path_len = p - path - 1;

    /* free memory */
    for (j = b2 + 1; j <= len2; ++j)
        dpcell[j] += j - b2;

    for (j = 0; j <= len2; ++j)
        MYFREE(dpcell[j]);

    MYFREE(dpcell);

    MYFREE(curr);

    MYFREE(last);

    return max;
}
/*************************************************
 * local alignment combined with banded strategy *
 *************************************************/
int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
                   path_t *path, int *path_len)
{
    register NT_LOCAL_SCORE *s;
    register int i;
    int q, r, qr, tmp_len, qr_shift;
    int **s_array, *score_array;
    int e, f;
    int is_overflow, of_base;
    NT_LOCAL_SCORE *eh, curr_h, last_h, curr_last_h;
    int j, start_i, start_j, end_i, end_j;
    path_t *p;
    int score_f, score_r, score_g;
    int start, end, max_score;

    int gap_open, gap_ext, b;
    int *score_matrix, N_MATRIX_ROW;

    /* initialize some align-related parameters. just for compatibility */
    gap_open = ap->gap_open;
    gap_ext = ap->gap_ext;
    b = ap->band_width;
    score_matrix = ap->matrix;
    N_MATRIX_ROW = ap->row;

    if (len1 == 0 || len2 == 0)
        return -1;

    /* allocate memory */
    eh = (NT_LOCAL_SCORE*)MYALLOC(sizeof(NT_LOCAL_SCORE) * (len1 + 1));

    s_array = (int**)MYALLOC(sizeof(int*) * N_MATRIX_ROW);

    for (i = 0; i != N_MATRIX_ROW; ++i)
        s_array[i] = (int*)MYALLOC(sizeof(int) * len1);

    /* initialization */
    aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);

    q = gap_open - gap_ext;

    r = gap_ext;

    qr = q + r;

    qr_shift = (qr+1) << NT_LOCAL_SHIFT;

    tmp_len = len1 + 1;

    start_i = start_j = end_i = end_j = 0;

    for (i = 0, max_score = 0; i != N_MATRIX_ROW * N_MATRIX_ROW; ++i)
        if (max_score < score_matrix[i])
            max_score = score_matrix[i];

    /* convert the coordinate */
    --seq1;

    --seq2;

    for (i = 0; i != N_MATRIX_ROW; ++i)
        --s_array[i];

    /* forward dynamic programming */
    for (i = 0, s = eh; i != tmp_len; ++i, ++s)
        *s = 0;

    score_f = 0;

    is_overflow = of_base = 0;

    for (j = 1; j <= len2; ++j)
    {
        last_h = f = 0;
        score_array = s_array[seq2[j]];

        if (is_overflow)
        { /* adjust eh[] array if overflow occurs. */
            /* If LOCAL_OVERFLOW_REDUCE is too small, optimal alignment might be missed.
             * If it is too large, this block will be excuted frequently and therefore
             * slow down the whole program.
             * Acually, smaller LOCAL_OVERFLOW_REDUCE might also help to reduce the
             * number of assignments because it sets some cells to zero when overflow
             * happens. */
            int tmp, tmp2;
            score_f -= LOCAL_OVERFLOW_REDUCE;
            of_base += LOCAL_OVERFLOW_REDUCE;
            is_overflow = 0;

            for (i = 1, s = eh; i <= tmp_len; ++i, ++s)
            {
                tmp = *s >> NT_LOCAL_SHIFT;
                tmp2 = *s & NT_LOCAL_MASK;

                if (tmp2 < LOCAL_OVERFLOW_REDUCE)
                    tmp2 = 0;
                else
                    tmp2 -= LOCAL_OVERFLOW_REDUCE;

                if (tmp < LOCAL_OVERFLOW_REDUCE)
                    tmp = 0;
                else
                    tmp -= LOCAL_OVERFLOW_REDUCE;

                *s = (tmp << NT_LOCAL_SHIFT) | tmp2;
            }

        }

        for (i = 1, s = eh; i != tmp_len; ++i, ++s)
        {
            /* prepare for calculate current h */
            curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];

            if (curr_h < 0)
                curr_h = 0;

            if (last_h > qr)
            { /* initialize f */
                f = (f > last_h - q)? f - r : last_h - qr;

                if (curr_h < f)
                    curr_h = f;
            }
            if (*(s+1) >= qr_shift)
            { /* initialize e */
                curr_last_h = *(s+1) >> NT_LOCAL_SHIFT;
                e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;

                if (curr_h < e)
                    curr_h = e;

                *s = (last_h << NT_LOCAL_SHIFT) | e;
            }
            else
                *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */

            last_h = curr_h;

            if (score_f < curr_h)
            {
                score_f = curr_h;
                end_i = i;
                end_j = j;

                if (score_f > LOCAL_OVERFLOW_THRESHOLD)
                    is_overflow = 1;
            }

        }
        *s = last_h << NT_LOCAL_SHIFT;
    }

    score_f += of_base;

    if (path == 0)
        goto end_func; /* skip path-filling */

    /* reverse dynamic programming */
    for (i = end_i, s = eh + end_i; i >= 0; --i, --s)
        *s = 0;

    if (end_i == 0 || end_j == 0)
        goto end_func; /* no local match */

    score_r = score_matrix[seq1[end_i] * N_MATRIX_ROW + seq2[end_j]];

    is_overflow = of_base = 0;

    start_i = end_i;

    start_j = end_j;

    eh[end_i] = ((NT_LOCAL_SCORE)(qr + score_r)) << NT_LOCAL_SHIFT; /* in order to initialize f and e, 040408 */

    start = end_i - 1;

    end = end_i - 3;

    if (end <= 0)
        end = 0;

    /* second pass DP can be done in a band, speed will thus be enhanced */
    for (j = end_j - 1; j != 0; --j)
    {
        last_h = f = 0;
        score_array = s_array[seq2[j]];

        if (is_overflow)
        { /* adjust eh[] array if overflow occurs. */
            int tmp, tmp2;
            score_r -= LOCAL_OVERFLOW_REDUCE;
            of_base += LOCAL_OVERFLOW_REDUCE;
            is_overflow = 0;

            for (i = start, s = eh + start + 1; i >= end; --i, --s)
            {
                tmp = *s >> NT_LOCAL_SHIFT;
                tmp2 = *s & NT_LOCAL_MASK;

                if (tmp2 < LOCAL_OVERFLOW_REDUCE)
                    tmp2 = 0;
                else
                    tmp2 -= LOCAL_OVERFLOW_REDUCE;

                if (tmp < LOCAL_OVERFLOW_REDUCE)
                    tmp = 0;
                else
                    tmp -= LOCAL_OVERFLOW_REDUCE;

                *s = (tmp << NT_LOCAL_SHIFT) | tmp2;
            }

        }

        for (i = start, s = eh + start + 1; i != end; --i, --s)
        {
            /* prepare for calculate current h */
            curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];

            if (curr_h < 0)
                curr_h = 0;

            if (last_h > qr)
            { /* initialize f */
                f = (f > last_h - q)? f - r : last_h - qr;

                if (curr_h < f)
                    curr_h = f;
            }
            if (*(s-1) >= qr_shift)
            { /* initialize e */
                curr_last_h = *(s-1) >> NT_LOCAL_SHIFT;
                e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;

                if (curr_h < e)
                    curr_h = e;

                *s = (last_h << NT_LOCAL_SHIFT) | e;
            }
            else
                *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */

            last_h = curr_h;

            if (score_r < curr_h)
            {
                score_r = curr_h;
                start_i = i;
                start_j = j;

                if (score_r + of_base - qr == score_f)
                {
                    j = 1;
                    break;
                }
                if (score_r > LOCAL_OVERFLOW_THRESHOLD)
                    is_overflow = 1;
            }

        }
        *s = last_h << NT_LOCAL_SHIFT;
        /* recalculate start and end, the boundaries of the band */

        if ((eh[start] >> NT_LOCAL_SHIFT) <= qr)
            --start;

        if (start <= 0)
            start = 0;

        end = start_i - (start_j - j) - (score_r + of_base + (start_j - j) * max_score) / r - 1;

        if (end <= 0)
            end = 0;
    }

    if (path_len == 0)
    {
        path[0].i = start_i;
        path[0].j = start_j;
        path[1].i = end_i;
        path[1].j = end_j;
        goto end_func;
    }

    score_r += of_base;
    score_r -= qr;

#ifdef DEBUG
    /* this seems not a bug */

    if (score_f != score_r)
        fprintf(stderr, "[aln_local_core] unknown flaw occurs: score_f(%d) != score_r(%d)\n", score_f, score_r);

#endif

    /* call global alignment to fill the path */
    score_g = 0;

    j = (end_i - start_i > end_j - start_j)? end_i - start_i : end_j - start_j;

    ++j; /* j is the maximum band_width */

    for (i = ap->band_width;; i <<= 1)
    {
        AlnParam ap_real = *ap;
        ap_real.gap_end = -1;
        ap_real.band_width = i;
        score_g = aln_global_core(seq1 + start_i, end_i - start_i + 1, seq2 + start_j,
                                  end_j - start_j + 1, &ap_real, path, path_len);

        if (score_g == score_r || score_f == score_g)
            break;

        if (i > j)
            break;
    }
    if (score_r > score_g && score_f > score_g)
        fprintf(stderr, "[aln_local_core] Cannot find reasonable band width. Continue anyway.\n");

    score_f = score_g;

    /* convert coordinate */
    for (p = path + *path_len - 1; p >= path; --p)
    {
        p->i += start_i - 1;
        p->j += start_j - 1;
    }

end_func:
    /* free */
    MYFREE(eh);

    for (i = 0; i != N_MATRIX_ROW; ++i)
    {
        ++s_array[i];
        MYFREE(s_array[i]);
    }

    MYFREE(s_array);
    return score_f;
}
AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int len1, int len2)
{
    unsigned char *seq11, *seq22;
    int score;
    int i, j, l;
    path_t *p;
    char *out1, *out2, *outm;
    AlnAln *aa;

    if (len1 < 0)
        len1 = strlen(seq1);

    if (len2 < 0)
        len2 = strlen(seq2);

    aa = aln_init_AlnAln();

    seq11 = (unsigned char*)MYALLOC(sizeof(unsigned char) * len1);

    seq22 = (unsigned char*)MYALLOC(sizeof(unsigned char) * len2);

    aa->path = (path_t*)MYALLOC(sizeof(path_t) * (len1 + len2 + 1));

    if (ap->row < 10)
    { /* 4-nucleotide alignment */

        for (i = 0; i < len1; ++i)
            seq11[i] = aln_nt4_table[(int)seq1[i]];

        for (j = 0; j < len2; ++j)
            seq22[j] = aln_nt4_table[(int)seq2[j]];
    }
    else if (ap->row < 20)
    { /* 16-nucleotide alignment */

        for (i = 0; i < len1; ++i)
            seq11[i] = aln_nt16_table[(int)seq1[i]];

        for (j = 0; j < len2; ++j)
            seq22[j] = aln_nt16_table[(int)seq2[j]];
    }
    else
    { /* amino acids */

        for (i = 0; i < len1; ++i)
            seq11[i] = aln_aa_table[(int)seq1[i]];

        for (j = 0; j < len2; ++j)
            seq22[j] = aln_aa_table[(int)seq2[j]];
    }

    if (is_global)
        score = aln_global_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len);
    else
        score = aln_local_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len);

    aa->score = score;

    out1 = aa->out1 = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));

    out2 = aa->out2 = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));

    outm = aa->outm = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));

    --seq1;

    --seq2;

    --seq11;

    --seq22;

    p = aa->path + aa->path_len - 1;

    for (l = 0; p >= aa->path; --p, ++l)
    {
        switch (p->ctype)
        {

        case FROM_M:
            out1[l] = seq1[p->i];
            out2[l] = seq2[p->j];
            outm[l] = (seq11[p->i] == seq22[p->j] && seq11[p->i] != ap->row)? '|' : ' ';
            break;

        case FROM_I:
            out1[l] = '-';
            out2[l] = seq2[p->j];
            outm[l] = ' ';
            break;

        case FROM_D:
            out1[l] = seq1[p->i];
            out2[l] = '-';
            outm[l] = ' ';
            break;
        }

    }
    out1[l] = out2[l] = outm[l] = '\0';
    ++seq11;
    ++seq22;

    MYFREE(seq11);
    MYFREE(seq22);

    p = aa->path + aa->path_len - 1;
    aa->start1 = p->i;
    aa->end1 = aa->path->i;
    aa->start2 = p->j;
    aa->end2 = aa->path->j;

    return aa;
}
AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int is_global)
{
    return aln_stdaln_aux(seq1, seq2, ap, is_global, -1, -1);
}
